<!DOCTYPE html>



  


<html class="theme-next gemini use-motion" lang="zh-CN">
<head>
  <meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<meta name="theme-color" content="#222">









<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />















  
  
  <link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />




  
  
  
  

  
    
    
  

  
    
      
    

    
  

  
    
      
    

    
  

  
    
      
    

    
  

  
    
      
    

    
  

  
    
    
    <link href="//fonts.googleapis.com/css?family=Microsoft YaHei:300,300italic,400,400italic,700,700italic|Microsoft YaHei:300,300italic,400,400italic,700,700italic|Microsoft YaHei:300,300italic,400,400italic,700,700italic|Microsoft YaHei:300,300italic,400,400italic,700,700italic|Inziu Iosevka Slab SC:300,300italic,400,400italic,700,700italic&subset=latin,latin-ext" rel="stylesheet" type="text/css">
  






<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />

<link href="/css/main.css?v=5.1.2" rel="stylesheet" type="text/css" />


  <meta name="keywords" content="Hexo, NexT" />








  <link rel="shortcut icon" type="image/x-icon" href="/favicon.ico?v=5.1.2" />






<meta name="description" content="一份不可多得的深度学习技巧指南https://zhuanlan.zhihu.com/p/30486369">
<meta property="og:type" content="article">
<meta property="og:title" content="Guidelines for Deep Learning Skills">
<meta property="og:url" content="http://idmk.oschina.io/2017/10/28/Guidelines-For-Deep-Learning-Skills/index.html">
<meta property="og:site_name" content="苦舟">
<meta property="og:description" content="一份不可多得的深度学习技巧指南https://zhuanlan.zhihu.com/p/30486369">
<meta property="og:locale" content="zh-CN">
<meta property="og:image" content="http://idmk.oschina.io/2017/10/28/Guidelines-For-Deep-Learning-Skills/assets/markdown-img-paste-20171028201502578.png">
<meta property="og:image" content="http://idmk.oschina.io/2017/10/28/Guidelines-For-Deep-Learning-Skills/assets/markdown-img-paste-20171028201726498.png">
<meta property="og:updated_time" content="2017-11-22T15:33:53.913Z">
<meta name="twitter:card" content="summary">
<meta name="twitter:title" content="Guidelines for Deep Learning Skills">
<meta name="twitter:description" content="一份不可多得的深度学习技巧指南https://zhuanlan.zhihu.com/p/30486369">
<meta name="twitter:image" content="http://idmk.oschina.io/2017/10/28/Guidelines-For-Deep-Learning-Skills/assets/markdown-img-paste-20171028201502578.png">



<script type="text/javascript" id="hexo.configurations">
  var NexT = window.NexT || {};
  var CONFIG = {
    root: '/',
    scheme: 'Gemini',
    sidebar: {"position":"left","display":"hide","offset":12,"offset_float":12,"b2t":false,"scrollpercent":false,"onmobile":false},
    fancybox: true,
    tabs: true,
    motion: true,
    duoshuo: {
      userId: '0',
      author: '博主'
    },
    algolia: {
      applicationID: '',
      apiKey: '',
      indexName: '',
      hits: {"per_page":10},
      labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
    }
  };
</script>



  <link rel="canonical" href="http://idmk.oschina.io/2017/10/28/Guidelines-For-Deep-Learning-Skills/"/>





  <title>Guidelines for Deep Learning Skills | 苦舟</title>
  














</head>

<body itemscope itemtype="http://schema.org/WebPage" lang="zh-CN">

  
  
    
  

  <div class="container sidebar-position-left page-post-detail ">
    <div class="headband"></div>

    <header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-wrapper">
  <div class="site-meta ">
    

    <div class="custom-logo-site-title">
      <a href="/"  class="brand" rel="start">
        <span class="logo-line-before"><i></i></span>
        <span class="site-title">苦舟</span>
        <span class="logo-line-after"><i></i></span>
      </a>
    </div>
      
        <p class="site-subtitle">学海无涯，吾将上下求索。</p>
      
  </div>

  <div class="site-nav-toggle">
    <button>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
      <span class="btn-bar"></span>
    </button>
  </div>
</div>

<nav class="site-nav">
  

  
    <ul id="menu" class="menu">
      
        
        <li class="menu-item menu-item-home">
          <a href="/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-home"></i> <br />
            
            首页
          </a>
        </li>
      
        
        <li class="menu-item menu-item-categories">
          <a href="/categories/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-th"></i> <br />
            
            分类
          </a>
        </li>
      
        
        <li class="menu-item menu-item-about">
          <a href="/about/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-user"></i> <br />
            
            关于
          </a>
        </li>
      
        
        <li class="menu-item menu-item-archives">
          <a href="/archives/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-archive"></i> <br />
            
            归档
          </a>
        </li>
      
        
        <li class="menu-item menu-item-tags">
          <a href="/tags/" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-tags"></i> <br />
            
            标签
          </a>
        </li>
      
        
        <li class="menu-item menu-item-commonweal">
          <a href="/404.html" rel="section">
            
              <i class="menu-item-icon fa fa-fw fa-heartbeat"></i> <br />
            
            公益404
          </a>
        </li>
      

      
        <li class="menu-item menu-item-search">
          
            <a href="javascript:;" class="popup-trigger">
          
            
              <i class="menu-item-icon fa fa-search fa-fw"></i> <br />
            
            搜索
          </a>
        </li>
      
    </ul>
  

  
    <div class="site-search">
      
  <div class="popup search-popup local-search-popup">
  <div class="local-search-header clearfix">
    <span class="search-icon">
      <i class="fa fa-search"></i>
    </span>
    <span class="popup-btn-close">
      <i class="fa fa-times-circle"></i>
    </span>
    <div class="local-search-input-wrapper">
      <input autocomplete="off"
             placeholder="搜索..." spellcheck="false"
             type="text" id="local-search-input">
    </div>
  </div>
  <div id="local-search-result"></div>
</div>



    </div>
  
</nav>



 </div>
    </header>

    <main id="main" class="main">
      <div class="main-inner">
        <div class="content-wrap">
          <div id="content" class="content">
            

  <div id="posts" class="posts-expand">
    

  

  
  
  

  <article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
  
  
  
  <div class="post-block">
    <link itemprop="mainEntityOfPage" href="http://idmk.oschina.io/2017/10/28/Guidelines-For-Deep-Learning-Skills/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="name" content="东木金">
      <meta itemprop="description" content="">
      <meta itemprop="image" content="/uploads/avatar.jpg">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="苦舟">
    </span>

    
      <header class="post-header">

        
        
          <h1 class="post-title" itemprop="name headline">Guidelines for Deep Learning Skills</h1>
        

        <div class="post-meta">
          <span class="post-time">
            
              <span class="post-meta-item-icon">
                <i class="fa fa-calendar-o"></i>
              </span>
              
                <span class="post-meta-item-text">发表于</span>
              
              <time title="创建于" itemprop="dateCreated datePublished" datetime="2017-10-28T18:31:00+08:00">
                2017-10-28
              </time>
            

            

            
          </span>

          

          
            
          

          
          

          

          

          

        </div>
      </header>
    

    
    
    
    <div class="post-body" itemprop="articleBody">

      
      

      
        <ol>
<li>一份不可多得的深度学习技巧指南<br><a href="https://zhuanlan.zhihu.com/p/30486369" target="_blank" rel="external">https://zhuanlan.zhihu.com/p/30486369</a><a id="more"></a>
</li>
</ol>
<h2 id="一份不可多得的深度学习技巧指南"><a href="#一份不可多得的深度学习技巧指南" class="headerlink" title="一份不可多得的深度学习技巧指南"></a>一份不可多得的深度学习技巧指南</h2><p>在本文中，列举了一些常用的机器学习的训练技巧，目的是对这些技巧进行简单的介绍并说明它们的工作原理。</p>
<p>本文的目录如下：</p>
<ul>
<li>数据预处理</li>
<li>初始化</li>
<li>训练</li>
<li>正则化</li>
<li>网络结构</li>
<li>自然语言处理</li>
<li>增强学习</li>
<li>网络压缩</li>
</ul>
<h3 id="数据预处理"><a href="#数据预处理" class="headerlink" title="数据预处理"></a>数据预处理</h3><p>（本部分原作者没有写，以个人的理解及相关补充这部分内容）</p>
<p>What：输入神经网络数据的好坏直接关系着网络训练结果，一般需要对数据进行预处理，常用的数据预处理方式有：</p>
<ul>
<li>去均值：每个原始数据减去全部数据的均值，即把输入数据各个维度的数据都中心化到 0；</li>
<li>归一化：一种方式是使用去均值后的数据除以标准差，另外一种方式是全部数据都除以数据绝对值的最大值；</li>
<li>PCA/ 白化：这是另外一种形式的数据预处理方式，一种方式是降维处理，另外一种是进行方差处理；</li>
</ul>
<p>Why：通过对数据进行预处理能够使得它们对模型的影响具有同样的尺度或其他的一些目的。</p>
<p>Ref：<a href="http://link.zhihu.com/?target=http%3A//cs231n.github.io/neural-networks-2/" target="_blank" rel="external">CS231n Convolutional Neural Networks for Visual Recognition.</a></p>
<h3 id="初始化"><a href="#初始化" class="headerlink" title="初始化"></a>初始化</h3><p>What：权重若初始化合理能够提升性能并加快训练速度，偏置一般设置为 0，对于权重而言，建议统一到一定区间内：</p>
<ul>
<li>对于线性层[1]：区间为[-v,v]，v = 1/sqrt( 输入尺寸 )，sqrt 表示开根号；</li>
<li>对于卷积层[2]：区间为[-v,v]，v = 1/sqrt( 卷积核的宽度 x 卷积核的高度 x 输入深度 )；</li>
<li>批量标准化[3]在某些方面的应用降低了调整权值初始化的需要，一些研究结果页提出了相应的替代公式。</li>
</ul>
<p>Why：使用默认的初始化，每个神经元会随着输入数量的增多而存在一个方差，通过求根号缩放每个权重能确保神经元有近似的输出分布。</p>
<p>Ref：</p>
<ol>
<li><a href="http://link.zhihu.com/?target=http%3A//www.research.microsoft.com/pubs/192769/tricks-2012.pdf" target="_blank" rel="external">Stochastic Gradient Descent Tricks, Leon Bottou</a>；</li>
<li>在 Torch 中默认这么操作；</li>
<li><a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1502.03167%3Fspm%3D5176.100239.blogcont221611.14.AWmFul%26file%3D1502.03167" target="_blank" rel="external">Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift, S. Ioffe and C. Szegedy</a>；</li>
</ol>
<p>What：对于长短期记忆网络（LSTM），遗忘偏置一般设置为 1，可以加快训练过程。<br>Why：直觉是训练开始时，想要信息在细胞之间传播，故不希望细胞忘记它的状态。<br>Ref：<a href="http://link.zhihu.com/?target=http%3A//www.datascienceassn.org/sites/default/files/An%2520Empirical%2520Exploration%2520of%2520Recurrent%2520Network%2520Architectures.pdf%3Fspm%3D5176.100239.blogcont221611.15.AWmFul%26file%3DAn%2520Empirical%2520Exploration%2520of%2520Recurrent%2520Network%2520Architectures.pdf" target="_blank" rel="external">An Empirical Exploration of Recurrent Network Architectures, Rafal Jozefowicz et al</a>.</p>
<p>Ref：</p>
<ol>
<li><a href="http://link.zhihu.com/?target=http%3A//xueshu.baidu.com/s%3Fwd%3Dpaperuri%253A%25287100b5d0e53d5d8ce70cf0dfdc3d9466%2529%26filter%3Dsc_long_sign%26tn%3DSE_xueshusource_2kduw22v%26sc_vurl%3Dhttp%253A%252F%252Fwww.scienceopen.com%252Fdocument%252Fvid%252Fdb04eb2c-31d8-4ee3-abde-d422c86e2bc9%26ie%3Dutf-8%26sc_us%3D43480405131843442" target="_blank" rel="external">Visualizing High-Dimensional Data Using t-SNE, L.J.P. van der Maaten</a>.</li>
<li><a href="http://link.zhihu.com/?target=http%3A//distill.pub/2016/misread-tsne/" target="_blank" rel="external">How to Use t-SNE Effectively, Wattenberg, et al., Distill, 2016</a>.</li>
</ol>
<h3 id="训练"><a href="#训练" class="headerlink" title="训练"></a>训练</h3><p>What：除了使用真值硬化目标外，同样可以使用软化目标（softmax 输出）训练网络。<br>Ref：<a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1503.02531" target="_blank" rel="external">Distilling the Knowledge in a Neural Network / Dark knowledge, G. Hinton et al</a>.<br>What：学习率可能是需要调参中最重要的一个参数，一种策略是选择一些参数均有随机化学习率，并观察几次迭代后的测试误差。<br><img src="assets/markdown-img-paste-20171028201502578.png" alt=""><br>Ref：Some advice for tuning the hyperparameters. Ref: Goodfellow et al 2016 Book</p>
<h3 id="正则化"><a href="#正则化" class="headerlink" title="正则化"></a>正则化</h3><p>What：<br>在 RNN 中使用 Dropout，它仅仅应用于非循环连接[1]，但是一些最近的文章提出了一些技巧使得 Dropout 能应用于循环连接[2]。<br>Ref：</p>
<ol>
<li><a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1409.2329" target="_blank" rel="external">Recurrent Neural Network Regularization, Wojciech Zaremba et al</a>.</li>
<li><a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1603.05118" target="_blank" rel="external">Recurrent Dropout without Memory Loss, Stanislau Semeniuta et al</a>.</li>
</ol>
<p>What：批量标准化（Batch Normalization, BN），增添了一个新的层，作者给出一些额外的技巧加速 BN 层的工作：</p>
<ul>
<li>增大学习率；</li>
<li>移除 / 减少 dropout：在不增加过拟合发生的条件下加快训练；</li>
<li>移除 / 减少 L2 范数权值归一化；</li>
<li>加快学习率衰减速度：使得网络训练更快；</li>
<li>移除局部响应归一化；</li>
<li>将训练样本打乱地更彻底：防止相同的样本总出现在小批量中（验证集上提高了 1%）；</li>
<li>减少光度失真；</li>
</ul>
<p>Why：<a href="http://link.zhihu.com/?target=https%3A//www.quora.com/Why-does-batch-normalization-help" target="_blank" rel="external">一些好的解释在此</a>。<br>Ref：<a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1502.03167" target="_blank" rel="external">Accelerating Deep Network Training by Reducing Internal Covariate Shift, S. Ioffe and C. Szegedy</a>.</p>
<h3 id="网络结构"><a href="#网络结构" class="headerlink" title="网络结构"></a>网络结构</h3><p>What：使用跳跃式连接，直接将中间层连接到输入 / 输出层。<br>Why：作者的观点是通过减少神经网络的底端与顶端之间的处理步骤使得训练深层网络更加简单，并减轻梯度消失问题。<br>When：在一些 CNN 结构中或 RNN 中一些重要的层。<br>Ref：<a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1308.0850" target="_blank" rel="external">Generating Sequences With Recurrent Neural Networks, Alex Grave et al</a>.</p>
<p><img src="assets/markdown-img-paste-20171028201726498.png" alt="RNN 的跳跃式连接例子"></p>
<p>What：为 LSTM 增加窥视孔连接（连接之前输出到门的输入），根据作者的观点，这个操作对长时间依赖关系有用。<br>Ref：<a href="http://link.zhihu.com/?target=http%3A//www.schraudolph.org/pubs/GerSchSch02.pdf" target="_blank" rel="external">Learning Precise Timing with LSTM Recurrent Networks, Felix A. Gers et al</a>.</p>
<p>What：大多数的深度学习框架提供了一个结合 SoftMax 和 Log 的函数或者是在损失函数中计算 SoftMax（在 Tensorflow 中是 softmax_cross_entropy_with_logits，在 Torch 中是 nn.LogSoftMax），这些应该被更好地使用。<br>Why：Log（SoftMax）在数值上不稳定是小概率，从而导致溢出等不良结果。另外一种流行的方法是在 Log 中加入一些小数避免不稳定。</p>
<h3 id="自然语言处理（NLP）"><a href="#自然语言处理（NLP）" class="headerlink" title="自然语言处理（NLP）"></a>自然语言处理（NLP）</h3><p>What：对于 RNN 和 seq2seq 模型的一些技巧：</p>
<ul>
<li>嵌入尺寸：1024 或 620。更小的维度比如 256 也能导致很好的表现，但是更高的维度不一定导致更好的表现；</li>
<li>对于译码器而言：LSTM&gt;GRU&gt;Vanilla-RNN；</li>
<li>2-4 层似乎普遍足够，但带有残差的更深网络看起来很难收敛，更多去挖掘更多的技巧；</li>
<li>Resd（密集的残差连接）&gt;Res（近连接先前层）&gt; 无残差连接；</li>
<li>对于编码器而言：双向 &gt; 单向（反向输入）&gt; 单向；</li>
<li>注意力（加法）&gt; 注意力（乘法）&gt; 无注意力；</li>
<li>使用光束会导致更好的结果；</li>
</ul>
<p>Ref：<a href="http://link.zhihu.com/?target=http%3A//xueshu.baidu.com/s%3Fwd%3Dpaperuri%253A%252822abe28be9b3324dd71e40bb62ecc079%2529%26filter%3Dsc_long_sign%26tn%3DSE_xueshusource_2kduw22v%26sc_vurl%3Dhttp%253A%252F%252Farxiv.org%252Fpdf%252F1703.03906%26ie%3Dutf-8%26sc_us%3D14069555537333173813" target="_blank" rel="external">Massive Exploration of Neural Machine Translation Architectures, Denny Britz, Anna Goldie et al</a>.</p>
<p>What：对于 seq2seq 而言，翻转输入序列的顺序，保持目标序列的完整。<br>Why：根据作者的观点，这种简单的数据变换极大提升了 LSTM 的性能。<br>Ref：<a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1409.3215%3Fspm%3D5176.100239.blogcont221611.27.AWmFul%26file%3D1409.3215" target="_blank" rel="external">Sequence to Sequence Learning with Neural Networks, Ilya Sutskever et al</a>.</p>
<p>What：对于 seq2seq 而言，为编码器和译码器网络使用不同的权值。<br>Ref：<a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1409.3215" target="_blank" rel="external">Sequence to Sequence Learning with Neural Networks, Ilya Sutskever et al</a>.</p>
<p>What：当训练时，强制更正译码器的输入；在测试时，使用先前的步骤，这使得训练在开始时非常高效，Samy 等人提出了一种基于模型转变的改进方法[1]。<br>Ref：<a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1506.03099" target="_blank" rel="external">1. Scheduled Sampling for Sequence Prediction with Recurrent Neural Networks, Samy Bengio et al</a>.</p>
<p>What：以无监督的方式训练一个网络去预测文本的下一个字符（char-RNN），该网络将学习一种能用来监督任务的表示（比如情感分析）。<br>Ref：<a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1704.01444" target="_blank" rel="external">Learning to Generate Reviews and Discovering Sentiment, Ilya Sutskever et al</a>.</p>
<h3 id="增强学习"><a href="#增强学习" class="headerlink" title="增强学习"></a>增强学习</h3><p>What：异步：以不同的勘探政策同时训练多个代理，提升了鲁棒性。<br>Ref：<a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1602.01783" target="_blank" rel="external">Asynchronous Methods for Deep Reinforcement Learning, V. Mnih</a>.</p>
<p>What：跳帧：每隔 4 帧计算一次动作，而不是每帧都计算，对于其它帧，重复这个动作。<br>Why：在 Atari 游戏中工作得很好，并且使用这个技巧以大约 4 倍的速度加快了训练过程。<br>Ref：<a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1312.5602" target="_blank" rel="external">Playing Atari with Deep Reinforcement Learning, V. Mnih</a>.</p>
<p>What：历史：不是仅仅将当前帧作为输入，而是将最后的帧与输入叠加，结合间隔为 4 的跳帧，这意味着我们有一个含 t、t-4、t-8 及 t-12 的帧栈。<br>Why：这允许网络有一些动量信息。<br>Ref：<a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1509.06461" target="_blank" rel="external">Deep Reinforcement Learning with Double Q-learning, V. Mnih</a>.</p>
<p>What：经验回放：为了避免帧间的相关性，作为一个代理不是更新每一帧，最好是在过渡时期的历史中采样一些样本，该思想类似于有监督学习中训练前打乱数据集。<br>Ref：<a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1511.05952" target="_blank" rel="external">Prioritized Experience Replay, Tom Schaul et al</a>.</p>
<p>What：Parallel Advantage Actor Critic(PAAC)：通过代理的经验以及使用一个单一的同步更新模型使得简化 A3C 算法成为可能。<br>Ref：<a href="http://link.zhihu.com/?target=https%3A//arxiv.org/abs/1705.04862v2" target="_blank" rel="external">Efficient Parallel Methods for Deep Reinforcement Learning, Alfredo V. Clemente et al</a>.</p>
<h3 id="网络压缩"><a href="#网络压缩" class="headerlink" title="网络压缩"></a>网络压缩</h3><p>What：在推理中，为了减少层数，通过批量归一化（BN）层能够吸收其它的权值。这是因为在测试时批量归一化进行地是一个简单的线性缩放。</p>

      
    </div>
    
    
    

    

    

    

    <footer class="post-footer">
      

      
      
      

      
        <div class="post-nav">
          <div class="post-nav-next post-nav-item">
            
              <a href="/2017/10/23/Wavelet-Transform/" rel="next" title="Wavelet Transform">
                <i class="fa fa-chevron-left"></i> Wavelet Transform
              </a>
            
          </div>

          <span class="post-nav-divider"></span>

          <div class="post-nav-prev post-nav-item">
            
              <a href="/2017/10/28/Git-Tutorial-liaoxuefeng/" rel="prev" title="Git Tutorial Liaoxuefeng">
                Git Tutorial Liaoxuefeng <i class="fa fa-chevron-right"></i>
              </a>
            
          </div>
        </div>
      

      
      
    </footer>
  </div>
  
  
  
  </article>



    <div class="post-spread">
      
    </div>
  </div>


          </div>
          


          
  <div class="comments" id="comments">
    
  </div>


        </div>
        
          
  
  <div class="sidebar-toggle">
    <div class="sidebar-toggle-line-wrap">
      <span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
      <span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
    </div>
  </div>

  <aside id="sidebar" class="sidebar">
    
    <div class="sidebar-inner">

      

      
        <ul class="sidebar-nav motion-element">
          <li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap" >
            文章目录
          </li>
          <li class="sidebar-nav-overview" data-target="site-overview">
            站点概览
          </li>
        </ul>
      

      <section class="site-overview sidebar-panel">
        <div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
          <img class="site-author-image" itemprop="image"
               src="/uploads/avatar.jpg"
               alt="东木金" />
          <p class="site-author-name" itemprop="name">东木金</p>
           
              <p class="site-description motion-element" itemprop="description">正在学习机器学习，希望能变得很强！</p>
          
        </div>
        <nav class="site-state motion-element">

          
            <div class="site-state-item site-state-posts">
              <a href="/archives/">
                <span class="site-state-item-count">162</span>
                <span class="site-state-item-name">日志</span>
              </a>
            </div>
          

          
            
            
            <div class="site-state-item site-state-categories">
              <a href="/categories/index.html">
                <span class="site-state-item-count">18</span>
                <span class="site-state-item-name">分类</span>
              </a>
            </div>
          

          
            
            
            <div class="site-state-item site-state-tags">
              <a href="/tags/index.html">
                <span class="site-state-item-count">42</span>
                <span class="site-state-item-name">标签</span>
              </a>
            </div>
          

        </nav>

        

        <div class="links-of-author motion-element">
          
            
              <span class="links-of-author-item">
                <a href="https://github.com/bdmk" target="_blank" title="GitHub">
                  
                    <i class="fa fa-fw fa-github"></i>
                  
                    
                      GitHub
                    
                </a>
              </span>
            
              <span class="links-of-author-item">
                <a href="mailto:catcherchan94@outlook.com" target="_blank" title="E-Mail">
                  
                    <i class="fa fa-fw fa-envelope"></i>
                  
                    
                      E-Mail
                    
                </a>
              </span>
            
          
        </div>

        
        

        
        

        


      </section>

      
      <!--noindex-->
        <section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
          <div class="post-toc">

            
              
            

            
              <div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-2"><a class="nav-link" href="#一份不可多得的深度学习技巧指南"><span class="nav-number">1.</span> <span class="nav-text">一份不可多得的深度学习技巧指南</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#数据预处理"><span class="nav-number">1.1.</span> <span class="nav-text">数据预处理</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#初始化"><span class="nav-number">1.2.</span> <span class="nav-text">初始化</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#训练"><span class="nav-number">1.3.</span> <span class="nav-text">训练</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#正则化"><span class="nav-number">1.4.</span> <span class="nav-text">正则化</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#网络结构"><span class="nav-number">1.5.</span> <span class="nav-text">网络结构</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#自然语言处理（NLP）"><span class="nav-number">1.6.</span> <span class="nav-text">自然语言处理（NLP）</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#增强学习"><span class="nav-number">1.7.</span> <span class="nav-text">增强学习</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#网络压缩"><span class="nav-number">1.8.</span> <span class="nav-text">网络压缩</span></a></li></ol></li></ol></div>
            

          </div>
        </section>
      <!--/noindex-->
      

      

    </div>
  </aside>


        
      </div>
    </main>

    <footer id="footer" class="footer">
      <div class="footer-inner">
        <div class="copyright" >
  
  &copy;  2017 - 
  <span itemprop="copyrightYear">2018</span>
  <span class="with-love">
    <i class="fa fa-heart"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">东木金</span>
</div>


<div class="powered-by">
  由 <a class="theme-link" href="https://hexo.io">Hexo</a> 强力驱动
</div>

<div class="theme-info">
  主题 -
  <a class="theme-link" href="https://github.com/iissnan/hexo-theme-next">
    NexT.Gemini
  </a>
</div>


        

        
      </div>
    </footer>

    
      <div class="back-to-top">
        <i class="fa fa-arrow-up"></i>
        
      </div>
    

  </div>

  

<script type="text/javascript">
  if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
    window.Promise = null;
  }
</script>









  












  
  <script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>

  
  <script type="text/javascript" src="/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>

  
  <script type="text/javascript" src="/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>

  
  <script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>

  
  <script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>

  
  <script type="text/javascript" src="/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>


  


  <script type="text/javascript" src="/js/src/utils.js?v=5.1.2"></script>

  <script type="text/javascript" src="/js/src/motion.js?v=5.1.2"></script>



  
  


  <script type="text/javascript" src="/js/src/affix.js?v=5.1.2"></script>

  <script type="text/javascript" src="/js/src/schemes/pisces.js?v=5.1.2"></script>



  
  <script type="text/javascript" src="/js/src/scrollspy.js?v=5.1.2"></script>
<script type="text/javascript" src="/js/src/post-details.js?v=5.1.2"></script>



  


  <script type="text/javascript" src="/js/src/bootstrap.js?v=5.1.2"></script>



  


  




	





  





  






  

  <script type="text/javascript">
    // Popup Window;
    var isfetched = false;
    var isXml = true;
    // Search DB path;
    var search_path = "search.xml";
    if (search_path.length === 0) {
      search_path = "search.xml";
    } else if (/json$/i.test(search_path)) {
      isXml = false;
    }
    var path = "/" + search_path;
    // monitor main search box;

    var onPopupClose = function (e) {
      $('.popup').hide();
      $('#local-search-input').val('');
      $('.search-result-list').remove();
      $('#no-result').remove();
      $(".local-search-pop-overlay").remove();
      $('body').css('overflow', '');
    }

    function proceedsearch() {
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay"></div>')
        .css('overflow', 'hidden');
      $('.search-popup-overlay').click(onPopupClose);
      $('.popup').toggle();
      var $localSearchInput = $('#local-search-input');
      $localSearchInput.attr("autocapitalize", "none");
      $localSearchInput.attr("autocorrect", "off");
      $localSearchInput.focus();
    }

    // search function;
    var searchFunc = function(path, search_id, content_id) {
      'use strict';

      // start loading animation
      $("body")
        .append('<div class="search-popup-overlay local-search-pop-overlay">' +
          '<div id="search-loading-icon">' +
          '<i class="fa fa-spinner fa-pulse fa-5x fa-fw"></i>' +
          '</div>' +
          '</div>')
        .css('overflow', 'hidden');
      $("#search-loading-icon").css('margin', '20% auto 0 auto').css('text-align', 'center');

      $.ajax({
        url: path,
        dataType: isXml ? "xml" : "json",
        async: true,
        success: function(res) {
          // get the contents from search data
          isfetched = true;
          $('.popup').detach().appendTo('.header-inner');
          var datas = isXml ? $("entry", res).map(function() {
            return {
              title: $("title", this).text(),
              content: $("content",this).text(),
              url: $("url" , this).text()
            };
          }).get() : res;
          var input = document.getElementById(search_id);
          var resultContent = document.getElementById(content_id);
          var inputEventFunction = function() {
            var searchText = input.value.trim().toLowerCase();
            var keywords = searchText.split(/[\s\-]+/);
            if (keywords.length > 1) {
              keywords.push(searchText);
            }
            var resultItems = [];
            if (searchText.length > 0) {
              // perform local searching
              datas.forEach(function(data) {
                var isMatch = false;
                var hitCount = 0;
                var searchTextCount = 0;
                var title = data.title.trim();
                var titleInLowerCase = title.toLowerCase();
                var content = data.content.trim().replace(/<[^>]+>/g,"");
                var contentInLowerCase = content.toLowerCase();
                var articleUrl = decodeURIComponent(data.url);
                var indexOfTitle = [];
                var indexOfContent = [];
                // only match articles with not empty titles
                if(title != '') {
                  keywords.forEach(function(keyword) {
                    function getIndexByWord(word, text, caseSensitive) {
                      var wordLen = word.length;
                      if (wordLen === 0) {
                        return [];
                      }
                      var startPosition = 0, position = [], index = [];
                      if (!caseSensitive) {
                        text = text.toLowerCase();
                        word = word.toLowerCase();
                      }
                      while ((position = text.indexOf(word, startPosition)) > -1) {
                        index.push({position: position, word: word});
                        startPosition = position + wordLen;
                      }
                      return index;
                    }

                    indexOfTitle = indexOfTitle.concat(getIndexByWord(keyword, titleInLowerCase, false));
                    indexOfContent = indexOfContent.concat(getIndexByWord(keyword, contentInLowerCase, false));
                  });
                  if (indexOfTitle.length > 0 || indexOfContent.length > 0) {
                    isMatch = true;
                    hitCount = indexOfTitle.length + indexOfContent.length;
                  }
                }

                // show search results

                if (isMatch) {
                  // sort index by position of keyword

                  [indexOfTitle, indexOfContent].forEach(function (index) {
                    index.sort(function (itemLeft, itemRight) {
                      if (itemRight.position !== itemLeft.position) {
                        return itemRight.position - itemLeft.position;
                      } else {
                        return itemLeft.word.length - itemRight.word.length;
                      }
                    });
                  });

                  // merge hits into slices

                  function mergeIntoSlice(text, start, end, index) {
                    var item = index[index.length - 1];
                    var position = item.position;
                    var word = item.word;
                    var hits = [];
                    var searchTextCountInSlice = 0;
                    while (position + word.length <= end && index.length != 0) {
                      if (word === searchText) {
                        searchTextCountInSlice++;
                      }
                      hits.push({position: position, length: word.length});
                      var wordEnd = position + word.length;

                      // move to next position of hit

                      index.pop();
                      while (index.length != 0) {
                        item = index[index.length - 1];
                        position = item.position;
                        word = item.word;
                        if (wordEnd > position) {
                          index.pop();
                        } else {
                          break;
                        }
                      }
                    }
                    searchTextCount += searchTextCountInSlice;
                    return {
                      hits: hits,
                      start: start,
                      end: end,
                      searchTextCount: searchTextCountInSlice
                    };
                  }

                  var slicesOfTitle = [];
                  if (indexOfTitle.length != 0) {
                    slicesOfTitle.push(mergeIntoSlice(title, 0, title.length, indexOfTitle));
                  }

                  var slicesOfContent = [];
                  while (indexOfContent.length != 0) {
                    var item = indexOfContent[indexOfContent.length - 1];
                    var position = item.position;
                    var word = item.word;
                    // cut out 100 characters
                    var start = position - 20;
                    var end = position + 80;
                    if(start < 0){
                      start = 0;
                    }
                    if (end < position + word.length) {
                      end = position + word.length;
                    }
                    if(end > content.length){
                      end = content.length;
                    }
                    slicesOfContent.push(mergeIntoSlice(content, start, end, indexOfContent));
                  }

                  // sort slices in content by search text's count and hits' count

                  slicesOfContent.sort(function (sliceLeft, sliceRight) {
                    if (sliceLeft.searchTextCount !== sliceRight.searchTextCount) {
                      return sliceRight.searchTextCount - sliceLeft.searchTextCount;
                    } else if (sliceLeft.hits.length !== sliceRight.hits.length) {
                      return sliceRight.hits.length - sliceLeft.hits.length;
                    } else {
                      return sliceLeft.start - sliceRight.start;
                    }
                  });

                  // select top N slices in content

                  var upperBound = parseInt('1');
                  if (upperBound >= 0) {
                    slicesOfContent = slicesOfContent.slice(0, upperBound);
                  }

                  // highlight title and content

                  function highlightKeyword(text, slice) {
                    var result = '';
                    var prevEnd = slice.start;
                    slice.hits.forEach(function (hit) {
                      result += text.substring(prevEnd, hit.position);
                      var end = hit.position + hit.length;
                      result += '<b class="search-keyword">' + text.substring(hit.position, end) + '</b>';
                      prevEnd = end;
                    });
                    result += text.substring(prevEnd, slice.end);
                    return result;
                  }

                  var resultItem = '';

                  if (slicesOfTitle.length != 0) {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + highlightKeyword(title, slicesOfTitle[0]) + "</a>";
                  } else {
                    resultItem += "<li><a href='" + articleUrl + "' class='search-result-title'>" + title + "</a>";
                  }

                  slicesOfContent.forEach(function (slice) {
                    resultItem += "<a href='" + articleUrl + "'>" +
                      "<p class=\"search-result\">" + highlightKeyword(content, slice) +
                      "...</p>" + "</a>";
                  });

                  resultItem += "</li>";
                  resultItems.push({
                    item: resultItem,
                    searchTextCount: searchTextCount,
                    hitCount: hitCount,
                    id: resultItems.length
                  });
                }
              })
            };
            if (keywords.length === 1 && keywords[0] === "") {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-search fa-5x" /></div>'
            } else if (resultItems.length === 0) {
              resultContent.innerHTML = '<div id="no-result"><i class="fa fa-frown-o fa-5x" /></div>'
            } else {
              resultItems.sort(function (resultLeft, resultRight) {
                if (resultLeft.searchTextCount !== resultRight.searchTextCount) {
                  return resultRight.searchTextCount - resultLeft.searchTextCount;
                } else if (resultLeft.hitCount !== resultRight.hitCount) {
                  return resultRight.hitCount - resultLeft.hitCount;
                } else {
                  return resultRight.id - resultLeft.id;
                }
              });
              var searchResultList = '<ul class=\"search-result-list\">';
              resultItems.forEach(function (result) {
                searchResultList += result.item;
              })
              searchResultList += "</ul>";
              resultContent.innerHTML = searchResultList;
            }
          }

          if ('auto' === 'manual') {
            input.addEventListener('input', inputEventFunction);
          } else {
            $('.search-icon').click(inputEventFunction);
            input.addEventListener('keypress', function (event) {
              if (event.keyCode === 13) {
                inputEventFunction();
              }
            });
          }

          // remove loading animation
          $(".local-search-pop-overlay").remove();
          $('body').css('overflow', '');

          proceedsearch();
        }
      });
    }

    // handle and trigger popup window;
    $('.popup-trigger').click(function(e) {
      e.stopPropagation();
      if (isfetched === false) {
        searchFunc(path, 'local-search-input', 'local-search-result');
      } else {
        proceedsearch();
      };
    });

    $('.popup-btn-close').click(onPopupClose);
    $('.popup').click(function(e){
      e.stopPropagation();
    });
    $(document).on('keyup', function (event) {
      var shouldDismissSearchPopup = event.which === 27 &&
        $('.search-popup').is(':visible');
      if (shouldDismissSearchPopup) {
        onPopupClose();
      }
    });
  </script>





  

  

  

  
  
    <script type="text/x-mathjax-config">
      MathJax.Hub.Config({
        tex2jax: {
          inlineMath: [ ['$','$'], ["\\(","\\)"]  ],
          processEscapes: true,
          skipTags: ['script', 'noscript', 'style', 'textarea', 'pre', 'code']
        }
      });
    </script>

    <script type="text/x-mathjax-config">
      MathJax.Hub.Queue(function() {
        var all = MathJax.Hub.getAllJax(), i;
        for (i=0; i < all.length; i += 1) {
          all[i].SourceElement().parentNode.className += ' has-jax';
        }
      });
    </script>
    <script type="text/javascript" src="//cdn.bootcss.com/mathjax/2.7.1/latest.js?config=TeX-AMS-MML_HTMLorMML"></script>
  


  

  

</body>
</html>
